The MNIST database of handwritten digits in latent space¶

Model training¶

In [1]:
# data
import numpy as np 
import pandas as pd

#PCA
from sklearn.decomposition import PCA
#LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
#TSNE
from sklearn.manifold import TSNE
#UMAP
import umap

# torch
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader,random_split
from torch import nn

# visual
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import plotly.express as px

# general
import copy
import time
import random
import os
In [2]:
# set seed
def set_seed() -> None:
    seed = 0
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

Data¶

In [3]:
set_seed()
path_data = 'data'

# load the MNIST dataset from torchvision
train_dataset = torchvision.datasets.MNIST(path_data, train=True, download=True)
test_dataset  = torchvision.datasets.MNIST(path_data, train=False, download=True)

print('len train_dataset: ', len(train_dataset))
print('len test_dataset: ', len(test_dataset))
print(f'image size: {test_dataset[0][0].size} = {test_dataset[0][0].size[0]*test_dataset[0][0].size[1]}')
Random seed set as 0
len train_dataset:  60000
len test_dataset:  10000
image size: (28, 28) = 784
In [4]:
# inspect the data
fig = plt.figure()
for i in range(36):
  plt.subplot(6,6,i+1)
  plt.tight_layout()
  plt.imshow(train_dataset[i][0], cmap='gray', interpolation='none')

  plt.title(f"GT: {train_dataset[i][1]}")
  plt.xticks([])
  plt.yticks([])
In [5]:
# transform to tensor
train_transform = transforms.Compose([
transforms.ToTensor(),
])

test_transform = transforms.Compose([
transforms.ToTensor(),
])

train_dataset.transform = train_transform
test_dataset.transform = test_transform

i=len(train_dataset)

# split data
train_data, val_data = random_split(train_dataset, [int(i-i*0.2), int(i*0.2)])
batch_size = 512

# init loader
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,shuffle=True)

Model¶

In [6]:
# init the model
class Encoder(nn.Module):

    def __init__(self, encoded_space_dim):
        super().__init__()

        self.encoder_cnn = nn.Sequential(
            nn.Conv2d(1, 8, 3, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 16, 3, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=0),
            nn.ReLU()
        )

        ### Flatten data
        self.flatten = nn.Flatten(start_dim=1)
        ### Linear section
        self.encoder_lin = nn.Sequential(
            nn.Linear(288, 128),
            nn.ReLU(),
            nn.Linear(128, encoded_space_dim)
        )
    def forward(self, x):
        x = self.encoder_cnn(x)
        x = self.flatten(x)
        x = self.encoder_lin(x)
        return x

class Decoder(nn.Module):
    def __init__(self, encoded_space_dim):
        super().__init__()
        self.decoder_lin = nn.Sequential(
            nn.Linear(encoded_space_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 288), 
            nn.ReLU()
        )

        self.unflatten = nn.Unflatten(dim=1,unflattened_size=(32, 3, 3))

        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 3,
                               stride=2, output_padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 8, 3, stride=2,
                               padding=1, output_padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 1, 3, stride=2,
                               padding=1, output_padding=1)
        )

    def forward(self, x):
        x = self.decoder_lin(x)
        x = self.unflatten(x)
        x = self.decoder_conv(x)
        x = torch.sigmoid(x)
        return x

Setup¶

In [7]:
# training function
def train_epoch(encoder, decoder, device, dataloader, loss_fn, optimizer):
    # Set train mode for both the encoder and the decoder
    encoder.train()
    decoder.train()
    train_loss = []
    # Iterate the dataloader
    for image_batch, _ in dataloader:
        image_batch = image_batch.to(device)
        # Encode data
        encoded_data = encoder(image_batch)
        # Decode data
        decoded_data = decoder(encoded_data)
        # Evaluate loss
        loss = loss_fn(decoded_data, image_batch)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Print batch loss
        train_loss.append(loss.detach().cpu().numpy())

    return np.mean(train_loss)



### testing function
def test_epoch(encoder, decoder, device, dataloader, loss_fn):
    # Set evaluation mode for encoder and decoder
    encoder.eval()
    decoder.eval()
    with torch.no_grad(): # No need to track the gradients
        # Define the lists to store the outputs for each batch
        conc_out = []
        conc_label = []
        for image_batch, _ in dataloader:
            # Move tensor to the proper device
            image_batch = image_batch.to(device)
            # Encode data
            encoded_data = encoder(image_batch)
            # Decode data
            decoded_data = decoder(encoded_data)
            # Append the network output and the original image to the lists
            conc_out.append(decoded_data.cpu())
            conc_label.append(image_batch.cpu())
        # Create a single tensor with all the values in the lists
        conc_out = torch.cat(conc_out)
        conc_label = torch.cat(conc_label)
        # Evaluate global loss
        val_loss = loss_fn(conc_out, conc_label)
    return val_loss.data
In [8]:
dims = [2,4,8,16,32,64,128]
encoders = []
test_loss_res=[]
for d in dims:
    # set seed
    set_seed()

    # learning setup
    loss_fn = torch.nn.MSELoss()
    lr = 0.001

    # select GPU ... if accessible
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    print(f'Selected device: {device}')

    encoder = Encoder(encoded_space_dim=d)
    decoder = Decoder(encoded_space_dim=d)
    encoder.to(device)
    decoder.to(device)

    # optimizer
    params_to_optimize = [
        {'params': encoder.parameters()},
        {'params': decoder.parameters()}
    ]
    optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=1e-05)

    num_epochs = 20
    for epoch in range(num_epochs):
       train_loss = train_epoch(encoder,decoder,device,
       train_loader,loss_fn,optim)
       val_loss = test_epoch(encoder,decoder,device,valid_loader,loss_fn)
       print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs,train_loss,val_loss))

    test_loss = test_epoch(encoder,decoder,device,test_loader,loss_fn)
    test_loss = test_loss.cpu().numpy()
    print('Final test loss:', test_loss)

    encoders.append(copy.deepcopy(encoder))
    test_loss_res.append(test_loss)
    
    torch.save(encoder.state_dict(), 'models/encoder_'+str(d)+'.pt')

df = pd.DataFrame(data={'test_loss': test_loss_res, 'dimensions': dims})
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.1298900693655014 	 val loss 0.08976574242115021

 EPOCH 2/20 	 train loss 0.07241076976060867 	 val loss 0.060408130288124084

 EPOCH 3/20 	 train loss 0.054600756615400314 	 val loss 0.050632670521736145

 EPOCH 4/20 	 train loss 0.04813282936811447 	 val loss 0.04637322202324867

 EPOCH 5/20 	 train loss 0.045157190412282944 	 val loss 0.04409974440932274

 EPOCH 6/20 	 train loss 0.04346778616309166 	 val loss 0.042812883853912354

 EPOCH 7/20 	 train loss 0.04241235554218292 	 val loss 0.04207753390073776

 EPOCH 8/20 	 train loss 0.041655283421278 	 val loss 0.04150001332163811

 EPOCH 9/20 	 train loss 0.04106437414884567 	 val loss 0.04100542888045311

 EPOCH 10/20 	 train loss 0.04062018543481827 	 val loss 0.04067697748541832

 EPOCH 11/20 	 train loss 0.040207792073488235 	 val loss 0.0403188019990921

 EPOCH 12/20 	 train loss 0.03987923637032509 	 val loss 0.0402093306183815

 EPOCH 13/20 	 train loss 0.039599549025297165 	 val loss 0.03993653133511543

 EPOCH 14/20 	 train loss 0.03932199999690056 	 val loss 0.03963366150856018

 EPOCH 15/20 	 train loss 0.03910810500383377 	 val loss 0.039464205503463745

 EPOCH 16/20 	 train loss 0.03886517137289047 	 val loss 0.039253927767276764

 EPOCH 17/20 	 train loss 0.03866118937730789 	 val loss 0.03916754573583603

 EPOCH 18/20 	 train loss 0.03847654163837433 	 val loss 0.0389322005212307

 EPOCH 19/20 	 train loss 0.03834862634539604 	 val loss 0.03880877047777176

 EPOCH 20/20 	 train loss 0.03821360319852829 	 val loss 0.038679495453834534
Final test loss: 0.038866103
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.16211728751659393 	 val loss 0.10671469569206238

 EPOCH 2/20 	 train loss 0.08306632190942764 	 val loss 0.06550020724534988

 EPOCH 3/20 	 train loss 0.05811253935098648 	 val loss 0.05250312760472298

 EPOCH 4/20 	 train loss 0.04884061962366104 	 val loss 0.045622892677783966

 EPOCH 5/20 	 train loss 0.042878348380327225 	 val loss 0.04020098224282265

 EPOCH 6/20 	 train loss 0.038085177540779114 	 val loss 0.03622526302933693

 EPOCH 7/20 	 train loss 0.03482360765337944 	 val loss 0.033692214637994766

 EPOCH 8/20 	 train loss 0.03287120535969734 	 val loss 0.03218082711100578

 EPOCH 9/20 	 train loss 0.03161238878965378 	 val loss 0.031188420951366425

 EPOCH 10/20 	 train loss 0.030718861147761345 	 val loss 0.03047061711549759

 EPOCH 11/20 	 train loss 0.030024848878383636 	 val loss 0.029929405078291893

 EPOCH 12/20 	 train loss 0.029492419213056564 	 val loss 0.029495984315872192

 EPOCH 13/20 	 train loss 0.029075004160404205 	 val loss 0.029145710170269012

 EPOCH 14/20 	 train loss 0.02869706228375435 	 val loss 0.028838714584708214

 EPOCH 15/20 	 train loss 0.028392387554049492 	 val loss 0.02853161282837391

 EPOCH 16/20 	 train loss 0.02810506895184517 	 val loss 0.028297824785113335

 EPOCH 17/20 	 train loss 0.02787167951464653 	 val loss 0.02814660035073757

 EPOCH 18/20 	 train loss 0.02766505628824234 	 val loss 0.027986207976937294

 EPOCH 19/20 	 train loss 0.027483994141221046 	 val loss 0.02782360091805458

 EPOCH 20/20 	 train loss 0.027339963242411613 	 val loss 0.027712976559996605
Final test loss: 0.02766888
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.17490415275096893 	 val loss 0.12061089277267456

 EPOCH 2/20 	 train loss 0.08592692017555237 	 val loss 0.06500912457704544

 EPOCH 3/20 	 train loss 0.05494464561343193 	 val loss 0.04558859393000603

 EPOCH 4/20 	 train loss 0.03835904225707054 	 val loss 0.03251131623983383

 EPOCH 5/20 	 train loss 0.029524216428399086 	 val loss 0.027222251519560814

 EPOCH 6/20 	 train loss 0.02577357366681099 	 val loss 0.024541864171624184

 EPOCH 7/20 	 train loss 0.02369610033929348 	 val loss 0.02306196093559265

 EPOCH 8/20 	 train loss 0.02230926789343357 	 val loss 0.021805396303534508

 EPOCH 9/20 	 train loss 0.02129039354622364 	 val loss 0.02098926715552807

 EPOCH 10/20 	 train loss 0.020557086914777756 	 val loss 0.02044609747827053

 EPOCH 11/20 	 train loss 0.01999804936349392 	 val loss 0.020025338977575302

 EPOCH 12/20 	 train loss 0.01949087157845497 	 val loss 0.019532442092895508

 EPOCH 13/20 	 train loss 0.019108330830931664 	 val loss 0.01916714571416378

 EPOCH 14/20 	 train loss 0.01875525526702404 	 val loss 0.018803788349032402

 EPOCH 15/20 	 train loss 0.018450306728482246 	 val loss 0.018546177074313164

 EPOCH 16/20 	 train loss 0.018156077712774277 	 val loss 0.018611768260598183

 EPOCH 17/20 	 train loss 0.017920583486557007 	 val loss 0.0180879645049572

 EPOCH 18/20 	 train loss 0.0176972858607769 	 val loss 0.017874743789434433

 EPOCH 19/20 	 train loss 0.01749100536108017 	 val loss 0.017706599086523056

 EPOCH 20/20 	 train loss 0.017286108806729317 	 val loss 0.01749982312321663
Final test loss: 0.017223055
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.1378922015428543 	 val loss 0.09271116554737091

 EPOCH 2/20 	 train loss 0.07292484492063522 	 val loss 0.058733005076646805

 EPOCH 3/20 	 train loss 0.05277152359485626 	 val loss 0.048095911741256714

 EPOCH 4/20 	 train loss 0.04501009359955788 	 val loss 0.04255345091223717

 EPOCH 5/20 	 train loss 0.040984682738780975 	 val loss 0.03920528665184975

 EPOCH 6/20 	 train loss 0.03817429766058922 	 val loss 0.03662383556365967

 EPOCH 7/20 	 train loss 0.03526986762881279 	 val loss 0.03357996419072151

 EPOCH 8/20 	 train loss 0.03200535476207733 	 val loss 0.030301492661237717

 EPOCH 9/20 	 train loss 0.02615249902009964 	 val loss 0.02190486714243889

 EPOCH 10/20 	 train loss 0.01923142559826374 	 val loss 0.017371756955981255

 EPOCH 11/20 	 train loss 0.016339531168341637 	 val loss 0.015543726272881031

 EPOCH 12/20 	 train loss 0.01480888668447733 	 val loss 0.014499901793897152

 EPOCH 13/20 	 train loss 0.013854985125362873 	 val loss 0.013786638155579567

 EPOCH 14/20 	 train loss 0.013179730623960495 	 val loss 0.013094027526676655

 EPOCH 15/20 	 train loss 0.012620325200259686 	 val loss 0.012586156837642193

 EPOCH 16/20 	 train loss 0.01220066100358963 	 val loss 0.012238304130733013

 EPOCH 17/20 	 train loss 0.01188776083290577 	 val loss 0.012016982771456242

 EPOCH 18/20 	 train loss 0.011615419760346413 	 val loss 0.011909388937056065

 EPOCH 19/20 	 train loss 0.011364178732037544 	 val loss 0.011712637729942799

 EPOCH 20/20 	 train loss 0.011140997521579266 	 val loss 0.011365707032382488
Final test loss: 0.011028641
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.13850729167461395 	 val loss 0.09721913188695908

 EPOCH 2/20 	 train loss 0.07488642632961273 	 val loss 0.05607090890407562

 EPOCH 3/20 	 train loss 0.04519077017903328 	 val loss 0.034955721348524094

 EPOCH 4/20 	 train loss 0.029479466378688812 	 val loss 0.024394024163484573

 EPOCH 5/20 	 train loss 0.021471340209245682 	 val loss 0.018727505579590797

 EPOCH 6/20 	 train loss 0.017249353229999542 	 val loss 0.015681350603699684

 EPOCH 7/20 	 train loss 0.014697390608489513 	 val loss 0.013752096332609653

 EPOCH 8/20 	 train loss 0.013035559095442295 	 val loss 0.012365826405584812

 EPOCH 9/20 	 train loss 0.01184671651571989 	 val loss 0.011332928203046322

 EPOCH 10/20 	 train loss 0.010971290990710258 	 val loss 0.010649077594280243

 EPOCH 11/20 	 train loss 0.010419031605124474 	 val loss 0.010332510806620121

 EPOCH 12/20 	 train loss 0.009845918044447899 	 val loss 0.009640068747103214

 EPOCH 13/20 	 train loss 0.0094508221372962 	 val loss 0.00936348456889391

 EPOCH 14/20 	 train loss 0.009120824746787548 	 val loss 0.008912383578717709

 EPOCH 15/20 	 train loss 0.008832087740302086 	 val loss 0.008793244138360023

 EPOCH 16/20 	 train loss 0.008574086241424084 	 val loss 0.008609356358647346

 EPOCH 17/20 	 train loss 0.008276860229671001 	 val loss 0.008277342654764652

 EPOCH 18/20 	 train loss 0.008088107220828533 	 val loss 0.008039719425141811

 EPOCH 19/20 	 train loss 0.0079641779884696 	 val loss 0.008045131340622902

 EPOCH 20/20 	 train loss 0.007827327586710453 	 val loss 0.007921675220131874
Final test loss: 0.007607157
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.15862125158309937 	 val loss 0.11813093721866608

 EPOCH 2/20 	 train loss 0.08929985016584396 	 val loss 0.06675083190202713

 EPOCH 3/20 	 train loss 0.056322935968637466 	 val loss 0.044012729078531265

 EPOCH 4/20 	 train loss 0.0353868268430233 	 val loss 0.028032777830958366

 EPOCH 5/20 	 train loss 0.023657560348510742 	 val loss 0.019827306270599365

 EPOCH 6/20 	 train loss 0.017899880185723305 	 val loss 0.01625004969537258

 EPOCH 7/20 	 train loss 0.014415382407605648 	 val loss 0.013234635815024376

 EPOCH 8/20 	 train loss 0.01223067007958889 	 val loss 0.011191233061254025

 EPOCH 9/20 	 train loss 0.010778682306408882 	 val loss 0.010819059796631336

 EPOCH 10/20 	 train loss 0.00964411348104477 	 val loss 0.009211997501552105

 EPOCH 11/20 	 train loss 0.008793789893388748 	 val loss 0.008468338288366795

 EPOCH 12/20 	 train loss 0.008259238675236702 	 val loss 0.007879016920924187

 EPOCH 13/20 	 train loss 0.007756507955491543 	 val loss 0.007415542379021645

 EPOCH 14/20 	 train loss 0.007239592261612415 	 val loss 0.00717651704326272

 EPOCH 15/20 	 train loss 0.006866370793431997 	 val loss 0.00673716003075242

 EPOCH 16/20 	 train loss 0.006654381286352873 	 val loss 0.009829923510551453

 EPOCH 17/20 	 train loss 0.0065794214606285095 	 val loss 0.006210580468177795

 EPOCH 18/20 	 train loss 0.006172514986246824 	 val loss 0.006126122083514929

 EPOCH 19/20 	 train loss 0.005898968316614628 	 val loss 0.006139357574284077

 EPOCH 20/20 	 train loss 0.005791765172034502 	 val loss 0.005696031264960766
Final test loss: 0.0054445667
Random seed set as 0
Selected device: cuda

 EPOCH 1/20 	 train loss 0.24610580503940582 	 val loss 0.17638134956359863

 EPOCH 2/20 	 train loss 0.12470408529043198 	 val loss 0.08018303662538528

 EPOCH 3/20 	 train loss 0.05923640355467796 	 val loss 0.04509679228067398

 EPOCH 4/20 	 train loss 0.03994816541671753 	 val loss 0.03558974713087082

 EPOCH 5/20 	 train loss 0.03352092579007149 	 val loss 0.03139674663543701

 EPOCH 6/20 	 train loss 0.030050383880734444 	 val loss 0.02864030934870243

 EPOCH 7/20 	 train loss 0.027546964585781097 	 val loss 0.02631637454032898

 EPOCH 8/20 	 train loss 0.025229519233107567 	 val loss 0.0236248429864645

 EPOCH 9/20 	 train loss 0.022127410396933556 	 val loss 0.020411890000104904

 EPOCH 10/20 	 train loss 0.01740909181535244 	 val loss 0.014707587659358978

 EPOCH 11/20 	 train loss 0.012917694635689259 	 val loss 0.011221327818930149

 EPOCH 12/20 	 train loss 0.01034642942249775 	 val loss 0.009393468499183655

 EPOCH 13/20 	 train loss 0.008820222690701485 	 val loss 0.008233228698372841

 EPOCH 14/20 	 train loss 0.007928725332021713 	 val loss 0.007541121914982796

 EPOCH 15/20 	 train loss 0.007211635820567608 	 val loss 0.006902025546878576

 EPOCH 16/20 	 train loss 0.006774909794330597 	 val loss 0.006621525157243013

 EPOCH 17/20 	 train loss 0.006408374290913343 	 val loss 0.006229662336409092

 EPOCH 18/20 	 train loss 0.00612502358853817 	 val loss 0.0059707434847950935

 EPOCH 19/20 	 train loss 0.005823879968374968 	 val loss 0.0058191027492284775

 EPOCH 20/20 	 train loss 0.005649421364068985 	 val loss 0.00585360499098897
Final test loss: 0.0056347596
In [9]:
# plot the trained models
fig = px.line(df, x="dimensions", y="test_loss", title='Testdata: loss vs. latent dimensions')
fig.show()

Visualization¶

In [10]:
# load the trained models
encoders = []
for d in dims:  
    encoder = Encoder(encoded_space_dim=d).to(device)
    encoder.load_state_dict(torch.load('models/encoder_'+str(d)+'.pt'))
    encoders.append(encoder)
In [11]:
# run model on test data => get latent space
def get_encoded_df(encoder):
    encoded_samples = []
    for sample in tqdm(test_dataset):
        img = sample[0].unsqueeze(0).to(device)
        label = sample[1]
        # Encode image
        encoder.eval()
        with torch.no_grad():
            encoded_img  = encoder(img)
        # Append to list
        encoded_img = encoded_img.flatten().cpu().numpy()
        encoded_sample = {f"dim {i}": enc for i, enc in enumerate(encoded_img)}
        encoded_sample['label'] = label
        encoded_samples.append(encoded_sample)
    encoded_samples = pd.DataFrame(encoded_samples)
    return encoded_samples
In [12]:
# plot the model of the 2 dimensional latent space
encoded_samples = get_encoded_df(encoders[0])

px.scatter(encoded_samples, x='dim 0', y='dim 1', labels={'dim 0': 'dim 1', 'dim 1': 'dim 2'},
           color=encoded_samples.label.astype(str)) # opacity=0.7
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:05<00:00, 1850.26it/s]

Comparison of different unsupervised learning algorithms for dimension reduction¶

In [13]:
# load encoder of 32 dim embedding
encoded_samples = get_encoded_df(encoders[4])
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:05<00:00, 1830.65it/s]
In [14]:
x = encoded_samples.iloc[:,:-1]
y = encoded_samples.iloc[:,-1]

pca¶

In [15]:
start = time.time()
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x)
print(f'Duration: {time.time() - start} seconds')
pca_df = pd.DataFrame(data = principalComponents
             , columns = ['pc_1', 'pc_2'])

fig = px.scatter(pca_df, x='pc_1', y='pc_2',
                 color=encoded_samples.label.astype(str))
fig.show()
Duration: 0.06872749328613281 seconds

lda¶

In [16]:
start = time.time()
lda = LDA(n_components=2).fit_transform(x,y)
print(f'Duration: {time.time() - start} seconds')

pca_df = pd.DataFrame(data = lda
             , columns = ['lda_1', 'lda_2'])

fig = px.scatter(pca_df, x='lda_1', y='lda_2',
                 color=encoded_samples.label.astype(str))
fig.show()
Duration: 0.056876420974731445 seconds

t-sne¶

In [17]:
start = time.time()
tsne = TSNE(random_state = 0, n_components=2, verbose=0, perplexity=30, n_iter=1000)
tsne_results = tsne.fit_transform(x)
print(f'Duration: {time.time() - start} seconds')

fig = px.scatter(tsne_results, x=0, y=1,
                 color=encoded_samples.label.astype(str),
                 labels={'0': 'tsne 1', '1': 'tsne 2'})
fig.show()
Duration: 14.396714210510254 seconds

umap¶

In [18]:
start = time.time()
reducer = umap.UMAP(random_state=42,n_components=2)
embedding = reducer.fit_transform(x)
print('Duration: {} seconds'.format(time.time() - start))

umap_df = pd.DataFrame(data = embedding
             , columns = ['umap_1', 'umap_2'])

fig = px.scatter(umap_df, x='umap_1', y='umap_2',
                 color=encoded_samples.label.astype(str))
fig.show()
Duration: 18.98189115524292 seconds